require(ggplot2)
require(ggthemes)
require(outliers)
require(EnvStats)


rm(list=ls())


#### This script reads in all the data that we collected from the fish and runs the stats.
    # This includes treatment received, Fibrosis score (Meghan), stiffness (slope of semi-log regression of force vs. curvature),
    # max body curvature during C-starts, Cross sectional area of the body, and other variables.
  # It also runs a size correction on the stiffness data, using the residuals of stiffness vs. cross sectional area to represent size corrected stiffness.

allData <- read.csv('FinalAllDataStickleback_April20.2022.csv')
dataNoVid <- read.csv('FinalAllNoVidData_April20.2022.csv')




#### First create a variable to represent fibrosis level as an ordinal factored variable. ####
allData$Fibrosis.Score.Factor <- factor(allData$Fibrosis.Score, levels=as.character(c("0","0.25","0.5","0.75","1","1.5","2","2.5","3","3.5","4")), ordered=TRUE)
levels(allData$Fibrosis.Score.Factor)

dataNoVid$Fibrosis.Score.Factor <- factor(dataNoVid$Fibrosis.Score, levels=as.character(c("0","0.25","0.5","0.75","1","1.5","2","2.5","3","3.5","4")), ordered=TRUE)
levels(dataNoVid$Fibrosis.Score.Factor)
      #Define levels of the Treatment variable for plotting consistency
allData$Treatment <- factor(allData$Treatment, levels=c("PBS","Alum"))
dataNoVid$Treatment <- factor(dataNoVid$Treatment, levels=c("PBS","Alum"))






#### Do size corrections on the force vs. curvature data ####
lmSizeCorr <- lm(data=dataNoVid, SLslope~CSA)
summary(lmSizeCorr)
dataNoVid$SLslopeResid <- residuals(lmSizeCorr)
dataNoVid$predict <- predict(lmSizeCorr)
sizeCorrR2 <- as.character(round(summary(lmSizeCorr)$r.squared,digits = 3))

ggplot(dataNoVid, aes(x=CSA,y=SLslope, colour=Treatment)) + 
  geom_point(aes(),size=2,alpha=1) +
  stat_smooth(method = "lm", col = "red") +
  annotate("text",x=.5,y=4, label=expression('R'^2~'=.135'), parse=FALSE) +
  labs(x="Cross-sectional area", y="Semi-log Slope (stiffness)", title="Stiffness Size Correction") +
  theme_few()


ggplot(dataNoVid, aes(x=CSA,y=SLslope)) +
  geom_smooth(method = "lm", se = FALSE, color = "lightgrey") +
  geom_point() +
  geom_point(aes(y = predict), shape = 1) +
  geom_segment(aes(xend = CSA, yend = predict), alpha=0.3) +
  theme_few()

ggplot(dataNoVid, aes(x=Treatment,y=SLslope, colour=Treatment)) + 
  geom_boxplot() +
  theme_few()

#Outlier detection
#I use the Rosner test because it can simultaneously detect multiple outliers.
#k is the expected number of outliers
rosnerTest(dataNoVid$SLslopeResid, k=4)
#It says that all 4 suspected values are outliers, so we discard them.

dataNoVidOutlier <- dataNoVid[-c(2,5,13,14),]




#### Redo size corrections on the force vs. curvature data without the outliers ####
lmSizeCorrOutlier <- lm(data=dataNoVidOutlier, SLslope~CSA)
summary(lmSizeCorrOutlier)
dataNoVidOutlier$SLslopeResid <- residuals(lmSizeCorrOutlier)
dataNoVidOutlier$predict <- predict(lmSizeCorrOutlier)
sizeCorrR2Outl <- as.character(round(summary(lmSizeCorrOutlier)$r.squared,digits = 3))

ggplot(dataNoVidOutlier, aes(x=CSA,y=SLslope, colour=Treatment)) + 
  geom_point(aes(),size=2,alpha=1) +
  stat_smooth(method = "lm", col = "red") +
  annotate("text",x=.5,y=4, label=expression('R'^2~'=.397'), parse=FALSE) +
  labs(x="Cross-sectional area", y="Semi-log Slope (stiffness)", title="Stiffness Size Correction") +
  theme_few()


ggplot(dataNoVidOutlier, aes(x=CSA,y=SLslope)) +
  geom_smooth(method = "lm", se = FALSE, color = "lightgrey") +
  geom_point() +
  geom_point(aes(y = predict), shape = 1) +
  geom_segment(aes(xend = CSA, yend = predict), alpha=0.3) +
  theme_few()





#### Statistics ####
  #Path analysis is in another script


#Structure of the argument
    #Before I can do anything else, I have to show that it is reasonable to assume that kinematics are correlated with performance
    #Since I don't care here about maximum performance, I do this on all the data (not just max values)
      #So I do either linear models or path analysis of kinematics to performance, without fibrosis data
    #Then once I establish the connection there, I switch to the max performance data, and do the full path of treatment to performance
      #This is my hypothesis, and the result that I focus on. The stats with all the data is just there to confirm the hypothesized path from kinematics to performance.

#First run models using all the data (every recorded escape response). This data set does NOT assume maximum performance.
    #Use this data set for models that involve variables measured in EVERY escape response (such as curvature, angular velocity, and linear displacement)



lmAD1 <- lm(data=allData, angVel~maxCurve)
lmAD2 <- lm(data=allData, comVelMMms~maxCurve)

plot(data=allData, angVel~maxCurve)
summary(lmAD1)
plot(data=allData, comVelMMms~maxCurve)
summary(lmAD2)
#Curvature predicts angular velocity but not linear velocity. Makes some sense.
#Though this is in all data, not just maximum performances.

#Does trial # matter?

lmAD3 <- lm(data=allData, maxCurve~Sequence+Animal)
summary(lmAD3)

lmAD4 <- lm(data=allData, angVel~Sequence+Animal)
summary(lmAD4)

lmAD5 <- lm(data=allData, comVelMMms~Sequence+Animal)
summary(lmAD5)
    #No it does not affect any kinematics or performance variables

                #These results suggest that our proposed model where body curvature predicts performance works for at least one performance metric.


#Test the direct effect of treatment on each variable.
#The point of this that we can find some effects, but we can't see the overall effect of treatment on final performance metrics.
#It is only by putting it all into a hierarchical path analysis that we can pick out the true effects

lmDE1 <- lm(data=allData, Fibrosis.Score~Treatment)
summary(lmDE1)

lmDE2 <- lm(data=allData, maxCurve~Treatment)
summary(lmDE2)

lmDE3 <- lm(data=dataNoVidOutlier, Fibrosis.Score~Treatment)
summary(lmDE3)

lmDE4 <- lm(data=dataNoVidOutlier, Highest.Max.Curve~Treatment)
summary(lmDE4)